import os
from PIL import Image
import pytesseract
import shutil

def has_text(image_path, lang='eng'):
    try:
        # 使用Tesseract OCR进行文字识别
        text = pytesseract.image_to_string(Image.open(image_path), lang=lang)

        # 判断图片中是否包含文字
        return len(text.strip()) > 0
    except Exception as e:
        print(f"Error occurred while processing image: {e}")
        return False

def move_images_without_text(source_folder, destination_folder, lang='eng'):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    for filename in os.listdir(source_folder):
        image_path = os.path.join(source_folder, filename)
        if os.path.isfile(image_path) and not has_text(image_path, lang=lang):
            destination_path = os.path.join(destination_folder, filename)
            shutil.move(image_path, destination_path)
            print(f"Moved image: {filename}")

if __name__ == "__main__":
    # 源文件夹的地址，包含待筛选的图片
    source_folder = "/Users/eee/Downloads/mengli/mengli-inputdata"

    # 目标文件夹的地址，将不包含文字的图片移动到这里
    destination_folder = "/Users/eee/Downloads/mengli/mengli-inputdata/eng"

    # 筛选并移动不包含文字的图片
    move_images_without_text(source_folder, destination_folder)